colnames2use_early <- c("municipality", "challenge_indicator", "special_designator", "voter_id", "enrollment",
                        "ward", "cd", "state_senate", "state_house", "county_commissioner", "request_type", 
                        "request_date", "sent_type", "sent_date", "received_type", "received_date", "accept_reject",
                        "duplicate_sequence_number", "request_denied", "DROP")

colnames2use_late <- c("municipality", "challenge_indicator", "special_designator", "voter_id", "enrollment", 
                       "ward", "cd", "state_senate", "state_house", "county_commissioner", "request_type", 
                       "request_date", "request_denied", "sent_type", "sent_date", "received_type", "received_date",
                       "duplicate_sequence_number", "accept_reject", "reject_reason", "other", "DROP")

for (election2use in elections2use_all) {
 
  file2read <- paste("/Users/herron/research/vbm-vulnerable-maine/data/", election2use, sep = "")
  if (election2use %in% c("2012-GE", "2014-GE")) {
    colnames2use <- colnames2use_early
  }
  else {
    colnames2use <- colnames2use_late
  }
  
  abl2use <- read_delim(file = file2read, delim = "|", skip = 1,
                        col_names = colnames2use, col_types = paste(rep("c", times = length(colnames2use)), collapse = "")) %>%
    ## filter out summary rows
    filter(str_detect(string = municipality, pattern = "^Total", TRUE)) %>%
    ## drop observations that have accept/reject codes of Y
    filter(accept_reject != "Y")
  
  ## Fix obvious year typos
  abl2use <- abl2use %>%
    mutate(request_date = gsub(" .*$", "", request_date)) %>%
    mutate(sent_date = gsub(" .*$", "", sent_date)) %>%
    mutate(received_date = gsub(" .*$", "", received_date)) %>%
    mutate_at(c("request_date", "sent_date", "received_date"), ~str_replace_all(., "2912$|2022$", "2012")) %>%
    mutate_at(c("request_date", "sent_date", "received_date"), ~str_replace_all(., "2914$|2024$", "2014")) %>%
    mutate_at(c("request_date", "sent_date", "received_date"), ~str_replace_all(., "2916$|2026$", "2016")) %>%
    mutate_at(c("request_date", "sent_date", "received_date"), ~str_replace_all(., "2918$|2028$", "2018")) %>%
    mutate_at(c("request_date", "sent_date", "received_date"), ~str_replace_all(., "2102$", "2012")) %>%
    mutate_at(c("request_date", "sent_date", "received_date"), ~str_replace_all(., "2104$", "2014")) %>%
    mutate_at(c("request_date", "sent_date", "received_date"), ~str_replace_all(., "2106$", "2016")) %>%
    mutate_at(c("request_date", "sent_date", "received_date"), ~str_replace_all(., "2108$", "2018"))
      
  ## update date formats
  abl2use <- abl2use %>%  
    mutate(request_date = as.Date(request_date, format = "%m/%d/%Y"),
           sent_date = as.Date(sent_date, format = "%m/%d/%Y"),
           received_date = gsub(" [0-9]{1,}:[0-9]{1,} [a-z]*", "", received_date),
           received_date = as.Date(received_date, format = "%m/%d/%Y"))
  
  ## Find rows in abl file with duplicate voter ids
  duplicates2use <- abl2use %>%
    filter(duplicated(voter_id)) %>%
    select(voter_id) %>%
    pull()
  
  ## For duplicated voter id rows, find the conservative row, prioritizing acceptance over rejection
  dta <- abl2use %>%
    filter(voter_id %in% duplicates2use) %>% 
    group_by(voter_id) %>%
    summarize(accept_reject_aggregate = paste(accept_reject, collapse = " ")) %>%
    mutate(accept_reject_new = case_when(
      str_detect(string = accept_reject_aggregate, pattern = "ACC") ~ "ACC", # If any code is accept, treat as accept
      !str_detect(string = accept_reject_aggregate, pattern = "ACC") & # If neither ACC nor REJ is found, assume code is missing
        !str_detect(string = accept_reject_aggregate, pattern = "REJ") ~ NA_character_,
      TRUE ~ "REJ")) %>%
    select(-accept_reject_aggregate)
  
  ## Merge in new accept reject code and drop duplicated rows that do not correspond to the conservative code
  abl2use <- abl2use %>%
    left_join(x = ., y = dta, by = c("voter_id" = "voter_id")) %>%
    filter(is.na(accept_reject_new) | accept_reject == accept_reject_new) %>%
    select(-accept_reject_new)
  
  ## If there are still duplicated rows in abl2use, they must have the same code. 
  ## Take the row with the latest request date.
  dta <- abl2use %>%
    group_by(voter_id) %>%
    summarize(request_date_max = max(request_date))
  
  abl2use <- abl2use %>%
    left_join(x = ., y = dta, by = c("voter_id" = "voter_id")) %>%
    filter(is.na(request_date) | request_date == request_date_max) %>%
    select(-request_date_max)
  
  ## If there are still duplicated rows in abl2use, they must have the same code and the same request date.  
  ## Take the row with the latest sent date.
  dta <- abl2use %>%
    group_by(voter_id) %>%
    summarize(sent_date_max = max(sent_date))
  
  abl2use <- abl2use %>%
    left_join(x = ., y = dta, by = c("voter_id" = "voter_id")) %>%
    filter(is.na(sent_date) | sent_date == sent_date_max) %>%
    select(-sent_date_max)
  
  ## If there are still duplicated rows in abl2use, they must have the same code, the same request date, and the same received date. 
  ## Take the row with the latest received date.
  dta <- abl2use %>%
    group_by(voter_id) %>%
    summarize(received_date_max = max(received_date))
  
  abl2use <- abl2use %>%
    left_join(x = ., y = dta, by = c("voter_id" = "voter_id")) %>%
    filter(is.na(received_date) | received_date == received_date_max) %>%
    select(-received_date_max)
  
  ## If there are still duplicated rows in abl2use, they must have the same code, the same request date, the same sent date,
  ## and the same received date. Take the row with the greatest duplicate number.
  dta <- abl2use %>%
    group_by(voter_id) %>%
    summarize(duplicate_sequence_number_max = max(duplicate_sequence_number))
  
  abl2use <- abl2use %>%
    left_join(x = ., y = dta, by = c("voter_id" = "voter_id")) %>%
    filter(is.na(duplicate_sequence_number) | duplicate_sequence_number == duplicate_sequence_number_max) %>%
    select(-duplicate_sequence_number_max)
  
  ## If there are still duplicated id, print an error message.
  if (sum(duplicated(abl2use$voter_id) > 0)) {
    stop ("Duplicated voter id numbers in ", election2use, "\n")
  }
  
 if (election2use == "2012-GE") {
    abl_2012_general <- abl2use
  }
  else if (election2use == "2014-GE") {
    abl_2014_general <- abl2use
  }
  else if (election2use == "2016-GE") {
    abl_2016_general <- abl2use
  }
  else if (election2use == "2018-GE") {
    abl_2018_general <- abl2use
  }
  
}

abl_files <- list("2012-GE" = abl_2012_general, 
                  "2014-GE" = abl_2014_general, 
                  "2016-GE" = abl_2016_general, 
                  "2018-GE" = abl_2018_general)

